* Data processing script to create necessary variables for analysis

version 15
* clear all
 use "D:\Dropbox\Publications\Hidden cost of trustworthiness\Replication files 26 FEB 2022\SOI_1982-2019.dta" // Use your own local path
count
* Remove  records flagged by NCCS (e.g., non-501c3s)
keep if out_nccs=="IN"

* Create ratio variables and some dummies
	gen pct_donative = tot_contr / tot_revenue
	label var pct_donative "Percent donative revenue (tot_contr / tot_revenue)"
	sum pct_donative, d
	gen donative = 0
	replace donative = 1 if pct_donative > 0.50
	label var donative "Donative dummy"
	tab donative
	tab ntmaj12 donative

	gen admin_ratio = mgmt_gen_exp / total_expenses
	label var admin_ratio "Administrative expense ratio"
	sum admin_ratio if out_nccs=="IN" & donative==1, d
	
	gen fundr_ratio = fund_exp / total_expenses
	label var fundr_ratio "Fundraising expense ratio"
	sum fundr_ratio if out_nccs=="IN" & donative==1, d
	
	gen fringe_ratio = fringe_exp / sal_exp
	label var fringe_ratio "Fringe expense ratio (fringe_exp / sal_exp)"
	sum fringe_ratio if out_nccs=="IN" & donative==1, d
	
	gen years_of_na = tot_net_assets / total_expenses
	label var years_of_na "Years of net assets"
	sum years_of_na if out_nccs=="IN" & donative==1, d
	
	gen liab_asset_ratio = tot_liabilities / tot_assets
	label var liab_asset_ratio "Liabilities to assets ratio"
	sum liab_asset_ratio if out_nccs=="IN" & donative==1, d
	
	gen profit_ratio = profit / tot_revenue
	label var profit_ratio "Ratio of profit to total revenue"
	sum profit_ratio if out_nccs=="IN" & donative==1, d
	
	gen liab_dummy = 0
	replace liab_dummy = 1 if tot_liabilities > 0 & tot_liabilities < .
	label var liab_dummy "Liabilities dummy"
	
	gen int_exp_ratio = interest_exp / total_expenses
	label var int_exp_ratio "Interest expense ratio (interest_exp / total_expenses)"
	sum int_exp_ratio if out_nccs=="IN" & donative==1, d
	
	gen int_exp_dummy = 0
	replace int_exp_dummy = 1 if interest_exp > 0 & interest_exp < .
	label var int_exp_dummy "Interest expense dummy"
	tab int_exp_dummy if out_nccs=="IN" & donative==1
		
* Create logged variables

	gen log_total_expenses = ln(total_expenses)
	label var log_total_expenses "Log of total expenses"
	
	gen log_net_assets = ln(tot_net_assets)
	label var log_net_assets "Log of total net assets"
		
* Create norm indicators

	// Generate main grouping variable for iteration (with diagnostics)
		sort year_soi ntmaj12
		egen groupvar = group (year_soi ntmaj12)
		// tab groupvar
		// bysort groupvar: tab year_soi ntmaj12	
			
	* Generate median years of net assets variable 
		gen med_years_of_na	= .
		label var med_years_of_na "Median years of net assets by sector-year"
		// sum groupvar, meanonly
		// display r(max)
	// Check macros
		macro list _all
		macro drop _all
	// Obtain number of values of groupvar for iteration (can only use up through 2012, though)
		sum groupvar, meanonly
		display r(max)
		local groups = r(max)
		display "`groups'"
	// Loop to obtain medians within sector-year, weighted
		foreach sectoryear of num 1/`groups' {
			display "Sector-year indicator: " `sectoryear'
			// tab year_soi ntmaj12 if groupvar == `sectoryear'
			_pctile years_of_na if groupvar == `sectoryear' [pweight=weight], p(50)
			display "Median years of net assets within sector-year: " r(r1)
			local medianvalue = r(r1)
			recode med_years_of_na (miss = `medianvalue') if groupvar == `sectoryear'
		}

	* Create dummy based on whether above median for the sector-year	
		gen years_of_na_dummy = .
		label var years_of_na_dummy "Years of net assets dummy (above median for sector-year)"	
		replace years_of_na_dummy = 1 if years_of_na > med_years_of_na & years_of_na != . & med_years_of_na != . & year_soi < 2013
		replace years_of_na_dummy = 0 if years_of_na <= med_years_of_na & years_of_na != . & med_years_of_na != . & year_soi < 2013

	* Generate median fundraising ratio variable 
		gen med_fundr_ratio	= .
		label var med_fundr_ratio "Median fundraising ratio by sector-year"
		// sum groupvar, meanonly
		// display r(max)
	// Check macros
		macro list _all
		macro drop _all
	// Obtain number of values of groupvar for iteration (can only use up through 2012, though)
		sum groupvar, meanonly
		// display r(max)
		local groups = r(max)
		// display "`groups'"
	// Loop to obtain medians within sector-year, weighted
		foreach sectoryear of num 1/`groups' {
			display "Sector-year indicator: " `sectoryear'
			// tab year_soi ntmaj12 if groupvar == `sectoryear'
			_pctile fundr_ratio if groupvar == `sectoryear' [pweight=weight], p(50)
			display "Median fundraising ratio within sector-year: " r(r1)
			local medianvalue = r(r1)
			recode med_fundr_ratio (miss = `medianvalue') if groupvar == `sectoryear'
		}

	* Create dummy based on whether above median for the sector-year	
		gen fundr_ratio_dummy = .
		label var fundr_ratio_dummy "Fundraising ratio dummy (above median for sector-year)"	
		replace fundr_ratio_dummy = 1 if fundr_ratio > med_fundr_ratio & fundr_ratio != . & med_fundr_ratio != . & year_soi < 2013
		replace fundr_ratio_dummy = 0 if fundr_ratio <= med_fundr_ratio & fundr_ratio != . & med_fundr_ratio != . & year_soi < 2013		
	
	* Generate median administrative ratio variable 
		gen med_admin_ratio	= .
		label var med_admin_ratio "Median administrative ratio by sector-year"
		// sum groupvar, meanonly
		// display r(max)
	// Check macros
		macro list _all
		macro drop _all
	// Obtain number of values of groupvar for iteration (can only use up through 2012, though)
		sum groupvar, meanonly
		// display r(max)
		local groups = r(max)
		// display "`groups'"
	// Loop to obtain medians within sector-year, weighted
		foreach sectoryear of num 1/`groups' {
			display "Sector-year indicator: " `sectoryear'
			// tab year_soi ntmaj12 if groupvar == `sectoryear'
			_pctile admin_ratio if groupvar == `sectoryear' [pweight=weight], p(50)
			display "Median administrative ratio within sector-year: " r(r1)
			local medianvalue = r(r1)
			recode med_admin_ratio (miss = `medianvalue') if groupvar == `sectoryear'
		}

	* Create dummy based on whether above median for the sector-year	
		gen admin_ratio_dummy = .
		label var admin_ratio_dummy "Administrative ratio dummy (above median for sector-year)"	
		replace admin_ratio_dummy = 1 if admin_ratio > med_admin_ratio & admin_ratio != . & med_admin_ratio != . & year_soi < 2013
		replace admin_ratio_dummy = 0 if admin_ratio <= med_admin_ratio & admin_ratio != . & med_admin_ratio != . & year_soi < 2013

	* Generate median profit ratio variable 
		gen med_profit_ratio	= .
		label var med_profit_ratio "Median profit ratio by sector-year"
		// sum groupvar, meanonly
		// display r(max)
	// Check macros
		macro list _all
		macro drop _all
	// Obtain number of values of groupvar for iteration (can only use up to 2012, though)
		sum groupvar, meanonly
		// display r(max)
		local groups = r(max)
		// display "`groups'"
	// Loop to obtain medians within sector-year, weighted
		foreach sectoryear of num 1/`groups' {
			display "Sector-year indicator: " `sectoryear'
			// tab year_soi ntmaj12 if groupvar == `sectoryear'
			_pctile profit_ratio if groupvar == `sectoryear' [pweight=weight], p(50)
			display "Median profit ratio within sector-year: " r(r1)
			local medianvalue = r(r1)
			recode med_profit_ratio (miss = `medianvalue') if groupvar == `sectoryear'
		}

	* Create dummy based on whether above or below median for the sector-year	
		gen profit_ratio_dummy = .
		label var profit_ratio_dummy "Profit ratio dummy (above median for sector-year)"	
		replace profit_ratio_dummy = 1 if profit_ratio > med_profit_ratio & profit_ratio != . & med_profit_ratio != . & year_soi < 2013
		replace profit_ratio_dummy = 0 if profit_ratio <= med_profit_ratio & profit_ratio != . & med_profit_ratio != . & year_soi < 2013


	* Generate median interest expense ratio variable 
		gen med_int_exp_ratio	= .
		label var med_int_exp_ratio "Median interest expense ratio by sector-year"
		// sum groupvar, meanonly
		// display r(max)
	// Check macros
		macro list _all
		macro drop _all
	// Obtain number of values of groupvar for iteration (can only use up through 2012, though)
		sum groupvar, meanonly
		// display r(max)
		local groups = r(max)
		// display "`groups'"
	// Loop to obtain medians within sector-year, weighted
		foreach sectoryear of num 1/`groups' {
			display "Sector-year indicator: " `sectoryear'
			// tab year_soi ntmaj12 if groupvar == `sectoryear'
			_pctile int_exp_ratio if groupvar == `sectoryear' [pweight=weight], p(50)
			display "Median interest expense ratio within sector-year: " r(r1)
			local medianvalue = r(r1)
			recode med_int_exp_ratio (miss = `medianvalue') if groupvar == `sectoryear'
		}

	* Create dummy based on whether above or below median for the sector-year	
		gen int_exp_ratio_dummy = .
		label var int_exp_ratio_dummy "Interest expense ratio dummy (above median for sector-year)"	
		replace int_exp_ratio_dummy = 1 if int_exp_ratio > med_int_exp_ratio & int_exp_ratio != . & med_int_exp_ratio != . & year_soi < 2013
		replace int_exp_ratio_dummy = 0 if int_exp_ratio <= med_int_exp_ratio & int_exp_ratio != . & med_int_exp_ratio != . & year_soi < 2013

* Create corrected revenue diversification measure (HHI)
	* Create total revenue (calculated)
		gen tot_rev_calc_abs = abs(o_revenue) + abs(tot_invest_inc) + abs(prog_serv_rev) + abs(priv_contr) + abs(gov_grants) + abs(mem_dues)
		label var tot_rev_calc_abs "Total revenue (calcualted, sum of absolute values for HHI)"
	* Create HHI
		gen hhi_abs = (o_revenue/tot_rev_calc_abs)^2 + (tot_invest_inc/tot_rev_calc_abs)^2 + (prog_serv_rev/tot_rev_calc_abs)^2 + (priv_contr/tot_rev_calc_abs)^2 + (gov_grants/tot_rev_calc_abs)^2 + (mem_dues/tot_rev_calc_abs)^2
		label var hhi_abs "Herfindahl-Hirschman Index (raw, based on absolute values)"
			
	* Initialize median HHI variable 
		gen med_hhi_abs_dummy= .
		label var med_hhi_abs_dummy "Median HHI by sector-year (abs)"
		// sum groupvar, meanonly
		// display r(max)
	// Check macros
		macro list _all
		macro drop _all
	// Obtain number of values of groupvar for iteration (can only use up through 2012, though)
		sum groupvar, meanonly
		// display r(max)
		local groups = r(max)
		// display "`groups'"
	// Loop to obtain medians within sector-year, weighted
		foreach sectoryear of num 1/`groups' {
			display "Sector-year indicator: " `sectoryear'
			// tab year_soi ntmaj12 if groupvar == `sectoryear'
			_pctile hhi_abs if groupvar == `sectoryear' [pweight=weight], p(50)
			display "Median HHI within sector-year: " r(r1)
			local medianvalue = r(r1)
			recode med_hhi_abs_dummy (miss = `medianvalue') if groupvar == `sectoryear'
		}

	// Create dummy based on whether above median for the sector-year	
		gen hhi_abs_dummy = .
		label var hhi_abs_dummy "HHI dummy (above median for sector-year; abs)"	
		replace hhi_abs_dummy = 1 if hhi_abs > med_hhi_abs & hhi_abs != . & med_hhi_abs != . & year_soi < 2013
		replace hhi_abs_dummy = 0 if hhi_abs <= med_hhi_abs & hhi_abs != . & med_hhi_abs != . & year_soi < 2013
	
* Misc. clean up
	drop inv_incm_tot_rev
	
